Project: Day17_Reprogramming_scRNA_seq

Lab: Ralston

Set up

Make project directories

project.dir = paste0(params$working_dir, params$project_name)
if(!dir.exists(project.dir)){dir.create(project.dir)}

data.dir = paste0(project.dir, "/data")
if(!dir.exists(data.dir)){dir.create(data.dir)}

results.dir = paste0(project.dir, "/results")
if(!dir.exists(results.dir)){dir.create(results.dir)}

if(params$goi != "none"){
  gene.dir = paste0(results.dir, "/", params$goi, "_expressing_cells")
  } else {
     gene.dir = paste0(results.dir, "/all_cells")}

if(!dir.exists(gene.dir)){dir.create(gene.dir)}

Set seed and sources

knitr::opts_knit$set(root.dir = params$working_dir)
set.seed(1)

source(paste0(project.dir, "/src/scRNA_seq_functions.R"))

Load the necessary libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.0 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(Seurat)
## Attaching SeuratObject
## Attaching sp
library(patchwork)
library(clustree)
## Loading required package: ggraph
## 
## Attaching package: 'ggraph'
## 
## The following object is masked from 'package:sp':
## 
##     geometry
library(filesstrings)
## 
## Attaching package: 'filesstrings'
## 
## The following object is masked from 'package:dplyr':
## 
##     all_equal
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

Normalize data

load QC'd Seurat object

load(paste0(data.dir, "/", params$seuratOb))
seuratOb
## An object of class Seurat 
## 22405 features across 4570 samples within 1 assay 
## Active assay: RNA (22405 features, 0 variable features)

scTransform

seuratOb = SCTransform(seuratOb, 
                       method = "glmGamPoi", 
                       vars.to.regress = "percent.mt", 
                       verbose = FALSE)

Cell-cycle scoring

A list of human cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. These have been converted to the mouse version of the genes. We'll use the expression of these genes to estimate the cell-cycle phase of each cell.

# This is how I got the cc genes, but it takes too long to run everytime

# https://satijalab.org/seurat/archive/v3.1/cell_cycle_vignette.html
# A list of human cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat.  
# We can segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

# Basic function to convert human to mouse gene names
convertHumanGeneList <- function(x){
  require("biomaRt")
  human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
  mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
  genesV2 = getLDS(attributes = c("hgnc_symbol"), 
                   filters = "hgnc_symbol", 
                   values = x , 
                   mart = human, 
                   attributesL = c("mgi_symbol"), 
                   martL = mouse, 
                   uniqueRows=T)
  return(genesV2)
}

s.mouse = convertHumanGeneList(s.genes)
s.mouse = s.mouse$MGI.symbol

g2m.mouse = convertHumanGeneList(g2m.genes)
g2m.mouse = g2m.mouse$MGI.symbol

Cell cycle genes

s.mouse = c("Exo1", "Msh2","Mcm4","Gmnn","Rrm2", 
            "Chaf1b","Mcm2","Slbp","Hells", "Cdc6", 
            "Rpa2","Gins2", "Ubr7","Uhrf1", "Fen1", 
            "Cdc45", "Rad51ap1", "Cdca7", "Dtl", "Nasp", 
            "Clspn", "Ung", "Usp1","Dscc1", "Prim1",
            "Wdr76", "Tipin", "Mcm6","Pola1", "Mcm5", 
            "Blm", "Casp8ap2", "Tyms", "E2f8","Rfc2", 
            "Rad51", "Pcna","Ccne2", "Rrm1","Brip1")

g2m.mouse = c("Dlgap5","Ctcf", "Smc4", "Kif20b","Gtse1",
              "Cdc25c","Cdca3","Tacc3","Ttk","Tpx2",
              "Top2a","Cks1brt", "G2e3", "Ckap2","Hmgb2",
              "Anp32e","Lbr","Ncapd2","Cks2", "Cdca2",
              "Psrc1","Cenpe","Ckap5","Nuf2", "Ect2",
              "Anln", "Kif2c","Cenpf","Cks1b","Rangap1",
              "Birc5","Cdca8","Kif11","Hmmr", "Aurkb",
              "Cdc20","Ndc80","Kif23","Nek2", "Hjurp",
              "Bub1", "Nusap1","Aurka","Ccnb2","Tubb4b", 
              "Ube2c","Ckap2l","Cenpa","Mki67" )
  
seuratOb = CellCycleScoring(seuratOb, 
                 s.features = s.mouse, 
                 g2m.features = g2m.mouse, 
                 set.ident = FALSE)

Re-normalize and regress out the cell-cycle scores

seuratOb <- SCTransform(seuratOb, 
                        method = "glmGamPoi",
                        vars.to.regress = c("percent.mt", "S.Score", "G2M.Score"),
                        verbose = FALSE)

Dimensionality reduction

Perform dimensionality reduction by PCA and UMAP

seuratOb <- RunPCA(seuratOb, verbose = FALSE)
seuratOb <- RunUMAP(seuratOb, 
                    dims = 1:30, 
                    verbose = FALSE)

Clustering

Cluster the cells with multiple resolution parameters and visualize the divisions with clustree

seuratOb <- FindNeighbors(seuratOb, 
                          dims = 1:30, 
                          verbose = FALSE)

# test resolutions from 0 to 1
res_vec = seq(0, 1.2, .1)
seuratOb <- FindClusters(seuratOb, 
                         verbose = FALSE, 
                         resolution = res_vec)
# save plot
if(params$goi == "none"){
pdf(file = paste0(results.dir, "/clustree_all_cells.pdf"),
    width = 6,
    height = 10)
print(clustree(seuratOb, prefix = "SCT_snn_res."))
dev.off()
} else{
  pdf(file = paste0(results.dir, "/clustree_", params$goi,"_expressing.pdf"),
    width = 6,
    height = 10)
print(clustree(seuratOb, prefix = "SCT_snn_res."))
dev.off()
}
## png 
##   2
# print clustree
clustree(seuratOb, prefix = "SCT_snn_res.")

Each row of circles on the clustree plot represents cells clustered at the resolution denoted by SCT_snn_res. Each circle represents a cluster. The arrows show how the cells move from cluster to cluster at different resolutions. in_prop show the proportion of the cells if the receiving cluster that came from the donating cluster. Make plots for all of them.

Save the Seurat Object

if(params$goi != "none"){
  
  save(seuratOb,  
     file = paste0(data.dir,
                   "/", 
                   params$project_name, 
                   "_",
                   params$goi,
                   "_expressing_QC_filtered_SeuratObject.Rdata"))
  
  save_path = paste0(data.dir,
                   "/", 
                   params$project_name, 
                   "_",
                   params$goi,
                   "_expressing_QC_filtered_SeuratObject.Rdata")
  
} else {
  save(seuratOb,  
     file = paste0(data.dir,
                   "/", 
                   params$project_name, 
                   "_QC_filtered_SeuratObject.Rdata"))
  
  save_path = paste0(data.dir,
                   "/", 
                   params$project_name, 
                   "_QC_filtered_SeuratObject.Rdata")
    
}

Saved here: /mnt/ufs18/home-115/hickeys6/Documents/Ralston/projects/Day17_Reprogramming_scRNA_seq/data/Day17_Reprogramming_scRNA_seq_QC_filtered_SeuratObject.Rdata

Clustering quality control

I want to check if the clusters represent known causes of variation between otherwise similar cells including:

# resolutions of interest
res_oi = res_vec[2:length(res_vec)]

# loop for making the plots
for(res in res_oi){
  
  # set the identity
  Idents(object = seuratOb) <- paste0("SCT_snn_res.", res)
  
  # cluster plot
  P1 = DimPlot(seuratOb, label = TRUE) + NoLegend()
 
  # UMI counts
  P2 = 
    VlnPlot(seuratOb, 
            features = "nCount_RNA", 
            pt.size = 0) + 
    NoLegend() + 
    ggtitle("UMI Counts") +
    xlab("Cluster") +
    geom_boxplot(width=0.1, color="black")
  
  # n Genes
  P3 = 
    VlnPlot(seuratOb, 
            features = "nFeature_RNA",
            pt.size = 0) + 
    NoLegend() +
    ggtitle("nGenes") +
    xlab("Cluster") +
    geom_boxplot(width=0.1, color="black")
  
  # percent mito
  P4 = 
    VlnPlot(seuratOb, 
            features = "percent.mt",
            pt.size = 0) + 
    NoLegend() +
    ggtitle("Percent mitochondrial reads") +
    xlab("Cluster") +
    geom_boxplot(width=0.1, color="black")
  
  # select the resolution col of interest and the Phase col
  select_cols = c(paste0("SCT_snn_res.", res), "Phase")
  meta_oi = seuratOb@meta.data[,select_cols]
  
  # tally the number of cells in each phase per cluster
  cc_tally = 
    meta_oi %>%
    group_by(across()) %>%
    tally()
  colnames(cc_tally) = c("Cluster", "Phase", "nCells")
  
  # all the tally of phases across all cells
  all_tally = 
    meta_oi %>%
    group_by(Phase) %>%
    tally() %>%
    mutate(Cluster = "All.Cells") %>%
    select(Cluster,
           Phase,
           n)
  colnames(all_tally) = c("Cluster", "Phase", "nCells")
  
  # r bind all cells and cluster-wise tally
  cc_tally = rbind(cc_tally, all_tally)
  
  # stacked bar plot with proportion of cells in each cell-cycle phase
  P5 = ggplot(cc_tally, 
              aes(fill = Phase,
                  y = nCells,
                  x = Cluster)) +
    geom_bar(position="fill", 
             stat="identity") +
    theme_classic() +
    ggtitle("Proportion cells in each phase") +
    ylab("proportion of cells")
  
  # combine the plots with patchwork
  Pall = 
    ((P1/P5) | (P2 + P3 + P4)) +  
    plot_annotation(title = paste0("resolution = ", res))
  
  # Print the plot
  print(Pall)
  
  # save the plot
  if(params$goi == "none"){
    
    ggsave(Pall, 
         file = paste0(gene.dir, 
                       "/all_cells_cluster_QC_plots_res", 
                       res, 
                       ".png"), 
         width = 9, 
         height = 6,
         dpi = 300)
  
  } else {
     ggsave(Pall, 
         file = paste0(gene.dir, 
                       "/", 
                       params$goi, 
                       "_expressing_cells_cluster_QC_plots_res", 
                       res, ".png"), 
         width = 9, 
         height = 6,
         dpi = 300)
    }
  }

Session info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /opt/software/OpenBLAS/0.3.7-GCC-8.3.0/lib/libopenblas_sandybridgep-r0.3.7.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Matrix_1.4-1       filesstrings_3.2.3 clustree_0.5.0     ggraph_2.0.6      
##  [5] patchwork_1.1.2    sp_1.5-0           SeuratObject_4.1.1 Seurat_4.1.1      
##  [9] forcats_0.5.2      stringr_1.4.0      dplyr_1.0.10       purrr_0.3.4       
## [13] readr_2.1.2        tidyr_1.2.0        tibble_3.1.8       ggplot2_3.3.6     
## [17] tidyverse_1.3.2   
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  reticulate_1.26            
##   [3] tidyselect_1.1.2            htmlwidgets_1.5.4          
##   [5] grid_4.1.0                  Rtsne_0.15                 
##   [7] munsell_0.5.0               codetools_0.2-18           
##   [9] ragg_1.2.2                  ica_1.0-3                  
##  [11] future_1.28.0               miniUI_0.1.1.1             
##  [13] withr_2.5.0                 spatstat.random_2.2-0      
##  [15] colorspace_2.0-3            progressr_0.11.0           
##  [17] Biobase_2.52.0              highr_0.9                  
##  [19] knitr_1.40                  stats4_4.1.0               
##  [21] ROCR_1.0-11                 tensor_1.5                 
##  [23] listenv_0.8.0               MatrixGenerics_1.4.0       
##  [25] labeling_0.4.2              GenomeInfoDbData_1.2.6     
##  [27] polyclip_1.10-0             farver_2.1.1               
##  [29] parallelly_1.32.1           vctrs_0.4.1                
##  [31] generics_0.1.3              xfun_0.32                  
##  [33] R6_2.5.1                    GenomeInfoDb_1.28.0        
##  [35] graphlayouts_0.8.1          bitops_1.0-7               
##  [37] spatstat.utils_2.3-1        cachem_1.0.6               
##  [39] DelayedArray_0.18.0         assertthat_0.2.1           
##  [41] promises_1.2.0.1            scales_1.2.1               
##  [43] googlesheets4_1.0.1         rgeos_0.5-9                
##  [45] gtable_0.3.0                globals_0.16.1             
##  [47] goftest_1.2-3               tidygraph_1.2.2            
##  [49] rlang_1.0.5                 systemfonts_1.0.4          
##  [51] splines_4.1.0               lazyeval_0.2.2             
##  [53] gargle_1.2.0                spatstat.geom_2.4-0        
##  [55] broom_1.0.1                 checkmate_2.1.0            
##  [57] yaml_2.3.5                  reshape2_1.4.4             
##  [59] abind_1.4-5                 modelr_0.1.9               
##  [61] backports_1.4.1             httpuv_1.6.5               
##  [63] tools_4.1.0                 ellipsis_0.3.2             
##  [65] spatstat.core_2.4-4         jquerylib_0.1.4            
##  [67] RColorBrewer_1.1-3          BiocGenerics_0.38.0        
##  [69] ggridges_0.5.3              Rcpp_1.0.9                 
##  [71] plyr_1.8.7                  sparseMatrixStats_1.4.0    
##  [73] zlibbioc_1.38.0             RCurl_1.98-1.8             
##  [75] rpart_4.1-15                deldir_1.0-6               
##  [77] pbapply_1.5-0               viridis_0.6.2              
##  [79] cowplot_1.1.1               S4Vectors_0.30.0           
##  [81] zoo_1.8-9                   SummarizedExperiment_1.22.0
##  [83] haven_2.5.1                 ggrepel_0.9.1              
##  [85] cluster_2.1.2               fs_1.5.2                   
##  [87] magrittr_2.0.3              data.table_1.14.2          
##  [89] glmGamPoi_1.6.0             scattermore_0.8            
##  [91] lmtest_0.9-40               reprex_2.0.2               
##  [93] RANN_2.6.1                  googledrive_2.0.0          
##  [95] fitdistrplus_1.1-8          matrixStats_0.62.0         
##  [97] hms_1.1.2                   mime_0.12                  
##  [99] evaluate_0.16               xtable_1.8-4               
## [101] readxl_1.4.1                IRanges_2.26.0             
## [103] gridExtra_2.3               compiler_4.1.0             
## [105] KernSmooth_2.23-20          crayon_1.5.1               
## [107] htmltools_0.5.3             mgcv_1.8-40                
## [109] later_1.3.0                 tzdb_0.3.0                 
## [111] lubridate_1.8.0             DBI_1.1.3                  
## [113] tweenr_2.0.2                dbplyr_2.2.1               
## [115] MASS_7.3-58.1               cli_3.3.0                  
## [117] parallel_4.1.0              igraph_1.3.4               
## [119] GenomicRanges_1.44.0        pkgconfig_2.0.3            
## [121] plotly_4.10.0               spatstat.sparse_2.1-1      
## [123] xml2_1.3.3                  bslib_0.4.0                
## [125] XVector_0.32.0              rvest_1.0.3                
## [127] digest_0.6.29               sctransform_0.3.4          
## [129] RcppAnnoy_0.0.19            spatstat.data_2.2-0        
## [131] rmarkdown_2.16              cellranger_1.1.0           
## [133] leiden_0.4.2                uwot_0.1.14                
## [135] DelayedMatrixStats_1.14.0   shiny_1.7.2                
## [137] lifecycle_1.0.1             nlme_3.1-159               
## [139] jsonlite_1.8.0              viridisLite_0.4.0          
## [141] fansi_1.0.3                 pillar_1.8.1               
## [143] lattice_0.20-45             fastmap_1.1.0              
## [145] httr_1.4.4                  survival_3.4-0             
## [147] glue_1.6.2                  png_0.1-7                  
## [149] ggforce_0.3.4               stringi_1.7.8              
## [151] sass_0.4.2                  textshaping_0.3.6          
## [153] strex_1.4.3                 irlba_2.3.5                
## [155] future.apply_1.9.0